Fix support for API concurrency > 1 by updating Uvicorn launch logic#408
Fix support for API concurrency > 1 by updating Uvicorn launch logic#408DifferentialityDevelopment wants to merge 3 commits intorunpod:mainfrom
Conversation
Fix for the bug WARNING: You must pass the application as an import string to enable 'reload' or 'workers'.
|
I notice that running with concurrency is totally not a thing on Windows.. I was going through all this to be able to stress test my application and gather data on per request memory usage, peak memory usage with different concurrency levels etc |
|
Unit tests are broken because of the addition of gunicorn as part of api_concurrency > 1. Should be a simple package install |
There was a problem hiding this comment.
Pull request overview
This PR aims to fix local serverless worker startup when --rp_api_concurrency > 1 by changing how Uvicorn is launched for multi-worker mode (to avoid Uvicorn’s requirement to use an import string when workers is set).
Changes:
- Adds branching logic in
WorkerAPI.start_uvicorn()to use differentuvicorn.run(...)invocation forapi_concurrency > 1vs== 1. - Switches the multi-worker path to an import-string target and enables
factory=True.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| if api_concurrency > 1: | ||
| # For multiple workers, we need to use the module:app format | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| "runpod.serverless.modules.rp_fastapi:app", | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=int(api_concurrency), | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False, | ||
| factory=True | ||
| ) | ||
| else: | ||
| # For single worker, we can use the app instance directly | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| self.rp_app, | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=1, | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False | ||
| ) | ||
|
|
There was a problem hiding this comment.
In the multi-worker branch, the import string points to runpod.serverless.modules.rp_fastapi:app, but this module does not define a module-level app symbol. Additionally, factory=True tells Uvicorn to treat that import as an app factory callable; even if app existed as a FastAPI instance, this would be invoked like a zero-arg function and fail. Define a proper module-level factory (and reference it), or expose a module-level ASGI app and remove factory=True so workers>1 can start reliably.
| if api_concurrency > 1: | |
| # For multiple workers, we need to use the module:app format | |
| import uvicorn.workers | |
| uvicorn.run( | |
| "runpod.serverless.modules.rp_fastapi:app", | |
| host=api_host, | |
| port=int(api_port), | |
| workers=int(api_concurrency), | |
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | |
| access_log=False, | |
| factory=True | |
| ) | |
| else: | |
| # For single worker, we can use the app instance directly | |
| import uvicorn.workers | |
| uvicorn.run( | |
| self.rp_app, | |
| host=api_host, | |
| port=int(api_port), | |
| workers=1, | |
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | |
| access_log=False | |
| ) | |
| import uvicorn.workers | |
| # `self.rp_app` is the ASGI app instance created for this WorkerAPI. | |
| # Running Uvicorn with an import string and `factory=True` requires a | |
| # module-level callable that is not defined in this module, so always | |
| # launch the existing app instance directly. | |
| uvicorn.run( | |
| self.rp_app, | |
| host=api_host, | |
| port=int(api_port), | |
| workers=1, | |
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | |
| access_log=False | |
| ) |
| # For multiple workers, we need to use the module:app format | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| "runpod.serverless.modules.rp_fastapi:app", | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=int(api_concurrency), | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False, | ||
| factory=True | ||
| ) | ||
| else: | ||
| # For single worker, we can use the app instance directly | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| self.rp_app, |
There was a problem hiding this comment.
import uvicorn.workers is executed in both branches but the module isn’t used. Importing uvicorn.workers typically requires the optional gunicorn dependency; since this repo doesn’t declare gunicorn, this can raise ModuleNotFoundError and break even the single-worker path. Remove this import (or guard it behind an explicit optional dependency check if you truly need Gunicorn).
| def start_uvicorn(self, api_host="localhost", api_port=8000, api_concurrency=1): | ||
| """ | ||
| Starts the Uvicorn server. | ||
| """ | ||
| uvicorn.run( | ||
| self.rp_app, | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=int(api_concurrency), | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False, | ||
| ) | ||
| if api_concurrency > 1: | ||
| # For multiple workers, we need to use the module:app format | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| "runpod.serverless.modules.rp_fastapi:app", | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=int(api_concurrency), | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False, | ||
| factory=True | ||
| ) | ||
| else: | ||
| # For single worker, we can use the app instance directly | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| self.rp_app, | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=1, | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False | ||
| ) |
There was a problem hiding this comment.
start_uvicorn now has branching behavior for api_concurrency > 1, but there are no unit tests asserting the parameters passed to uvicorn.run in either branch (especially the multi-worker import-string path). Adding targeted tests that patch runpod.serverless.modules.rp_fastapi.uvicorn.run and exercise both concurrency modes will help prevent regressions like missing symbols / wrong factory settings.
| if api_concurrency > 1: | ||
| # For multiple workers, we need to use the module:app format | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| "runpod.serverless.modules.rp_fastapi:app", | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=int(api_concurrency), | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False, | ||
| factory=True | ||
| ) | ||
| else: | ||
| # For single worker, we can use the app instance directly | ||
| import uvicorn.workers | ||
| uvicorn.run( | ||
| self.rp_app, | ||
| host=api_host, | ||
| port=int(api_port), | ||
| workers=1, | ||
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | ||
| access_log=False | ||
| ) | ||
|
|
There was a problem hiding this comment.
Enabling workers > 1 means requests will be served by multiple processes. JobsProgress persists to disk, but methods used by the API (job_list.get(...) in _sim_stream/_sim_status and others) don’t reload state before reading, so a worker may not see jobs added by a different worker and return “Job ID not found”. If multi-worker mode is supported, consider reloading state on reads (e.g., in JobsProgress.get() / __iter__) or otherwise ensuring cross-process consistency for job tracking.
| if api_concurrency > 1: | |
| # For multiple workers, we need to use the module:app format | |
| import uvicorn.workers | |
| uvicorn.run( | |
| "runpod.serverless.modules.rp_fastapi:app", | |
| host=api_host, | |
| port=int(api_port), | |
| workers=int(api_concurrency), | |
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | |
| access_log=False, | |
| factory=True | |
| ) | |
| else: | |
| # For single worker, we can use the app instance directly | |
| import uvicorn.workers | |
| uvicorn.run( | |
| self.rp_app, | |
| host=api_host, | |
| port=int(api_port), | |
| workers=1, | |
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | |
| access_log=False | |
| ) | |
| if int(api_concurrency) > 1: | |
| raise ValueError( | |
| "api_concurrency > 1 is not supported because job tracking state is " | |
| "not synchronized across multiple Uvicorn worker processes." | |
| ) | |
| # For a single worker, we can use the app instance directly. | |
| import uvicorn.workers | |
| uvicorn.run( | |
| self.rp_app, | |
| host=api_host, | |
| port=int(api_port), | |
| workers=1, | |
| log_level=os.environ.get("UVICORN_LOG_LEVEL", "info"), | |
| access_log=False | |
| ) |

Fix for the bug when launching the serverless worker locally with api concurrency > 1
I got the following error:
WARNING: You must pass the application as an import string to enable 'reload' or 'workers'.
As per my issue I created here
#407